url_in <- "https://data.cityofnewyork.us/api/views/833y-fsy8/rows.csv?accessType=DOWNLOAD"
shootings_raw <- read_csv(url_in)
Rows: 29744 Columns: 21
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): OCCUR_DATE, BORO, LOC_OF_OCCUR_DESC, LOC_CLASSFCTN_DESC, LOCATION...
dbl (5): INCIDENT_KEY, PRECINCT, JURISDICTION_CODE, Latitude, Longitude
num (2): X_COORD_CD, Y_COORD_CD
lgl (1): STATISTICAL_MURDER_FLAG
time (1): OCCUR_TIME
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(shootings_raw)
INCIDENT_KEY OCCUR_DATE OCCUR_TIME BORO LOC_OF_OCCUR_DESC PRECINCT
summary(shootings_raw)
INCIDENT_KEY OCCUR_DATE OCCUR_TIME BORO
Min. : 9953245 Length:29744 Length:29744 Length:29744
1st Qu.: 67321140 Class :character Class1:hms Class :character
Median :109291972 Mode :character Class2:difftime Mode :character
Mean :133850951 Mode :numeric
3rd Qu.:214741917
Max. :299462478
LOC_OF_OCCUR_DESC PRECINCT JURISDICTION_CODE LOC_CLASSFCTN_DESC
Length:29744 Min. : 1.00 Min. :0.0000 Length:29744
Class :character 1st Qu.: 44.00 1st Qu.:0.0000 Class :character
Mode :character Median : 67.00 Median :0.0000 Mode :character
Mean : 65.23 Mean :0.3181
3rd Qu.: 81.00 3rd Qu.:0.0000
Max. :123.00 Max. :2.0000
NA’s :2
LOCATION_DESC STATISTICAL_MURDER_FLAG PERP_AGE_GROUP
Length:29744 Mode :logical Length:29744
Class :character FALSE:23979 Class :character
Mode :character TRUE :5765 Mode :character
PERP_SEX PERP_RACE VIC_AGE_GROUP VIC_SEX
Length:29744 Length:29744 Length:29744 Length:29744
Class :character Class :character Class :character Class
:character
Mode :character Mode :character Mode :character Mode :character
VIC_RACE X_COORD_CD Y_COORD_CD Latitude
Length:29744 Min. : 914928 Min. :125757 Min. :40.51
Class :character 1st Qu.:1000094 1st Qu.:183042 1st Qu.:40.67
Mode :character Median :1007826 Median :195506 Median :40.70
Mean :1009442 Mean :208722 Mean :40.74
3rd Qu.:1016739 3rd Qu.:239980 3rd Qu.:40.83
Max. :1066815 Max. :271128 Max. :40.91
NA’s :97
Longitude Lon_Lat
Min. :-74.25 Length:29744
1st Qu.:-73.94 Class :character
Median :-73.91 Mode :character
Mean :-73.91
3rd Qu.:-73.88
Max. :-73.70
NA’s :97
glimpse(shootings_raw)
Rows: 29,744 Columns: 21 $ INCIDENT_KEY
colnames(shootings_raw)
[1] “INCIDENT_KEY” “OCCUR_DATE”
[3] “OCCUR_TIME” “BORO”
[5] “LOC_OF_OCCUR_DESC” “PRECINCT”
[7] “JURISDICTION_CODE” “LOC_CLASSFCTN_DESC”
[9] “LOCATION_DESC” “STATISTICAL_MURDER_FLAG” [11] “PERP_AGE_GROUP”
“PERP_SEX”
[13] “PERP_RACE” “VIC_AGE_GROUP”
[15] “VIC_SEX” “VIC_RACE”
[17] “X_COORD_CD” “Y_COORD_CD”
[19] “Latitude” “Longitude”
[21] “Lon_Lat”
shootings_clean <- shootings_raw %>%
mutate(
OCCUR_DATE = mdy(OCCUR_DATE),
BORO = as.factor(BORO),
PERP_SEX = as.factor(PERP_SEX),
PERP_RACE = as.factor(PERP_RACE),
VIC_SEX = as.factor(VIC_SEX),
VIC_RACE = as.factor(VIC_RACE)
) %>%
mutate(
Period = case_when(
OCCUR_DATE >= as.Date("2006-01-01") & OCCUR_DATE <= as.Date("2010-12-31") ~ "2006–2010",
OCCUR_DATE >= as.Date("2011-01-01") & OCCUR_DATE <= as.Date("2015-12-31") ~ "2011–2015",
OCCUR_DATE >= as.Date("2016-01-01") & OCCUR_DATE <= as.Date("2020-12-31") ~ "2016–2020",
OCCUR_DATE >= as.Date("2021-01-01") ~ "2021–2024",
TRUE ~ NA_character_
)
) %>%
rename(
`Occur Date` = OCCUR_DATE,
`Perp Sex` = PERP_SEX,
`Perp Race` = PERP_RACE,
`Perp Age Group` = PERP_AGE_GROUP,
`Victim Sex` = VIC_SEX,
`Victim Race` = VIC_RACE,
`Victim Age Group` = VIC_AGE_GROUP
) %>%
select(
-INCIDENT_KEY,
-OCCUR_TIME,
-LOC_OF_OCCUR_DESC,
-PRECINCT,
-JURISDICTION_CODE,
-LOC_CLASSFCTN_DESC,
-LOCATION_DESC,
-STATISTICAL_MURDER_FLAG,
-X_COORD_CD,
-Y_COORD_CD,
-Lon_Lat
)
Occur Date BORO Perp Age Group Perp Sex
Min. :2006-01-01 BRONX : 8834 Length:29744 (null): 1628
1st Qu.:2009-10-29 BROOKLYN :11685 Class :character F : 461
Median :2014-03-25 MANHATTAN : 3977 Mode :character M :16845
Mean :2014-10-31 QUEENS : 4426 U : 1500
3rd Qu.:2020-06-29 STATEN ISLAND: 822 NA’s : 9310
Max. :2024-12-31
Perp Race Victim Age Group Victim Sex
BLACK :12323 Length:29744 F: 2891
WHITE HISPANIC: 2667 Class :character M:26841
UNKNOWN : 1838 Mode :character U: 12
(null) : 1628
BLACK HISPANIC: 1487
(Other) : 491
NA’s : 9310
Victim Race Latitude Longitude
AMERICAN INDIAN/ALASKAN NATIVE: 13 Min. :40.51 Min. :-74.25
ASIAN / PACIFIC ISLANDER : 478 1st Qu.:40.67 1st Qu.:-73.94
BLACK :20999 Median :40.70 Median :-73.91
BLACK HISPANIC : 2930 Mean :40.74 Mean :-73.91
UNKNOWN : 72 3rd Qu.:40.83 3rd Qu.:-73.88
WHITE : 741 Max. :40.91 Max. :-73.70
WHITE HISPANIC : 4511 NA’s :97 NA’s :97
Period
Length:29744
Class :character
Mode :character
Rows: 29,744 Columns: 11 $ Occur Date Perp Age Group Perp Sex Perp Race Victim Age Group Victim Sex
Victim Race
[5] “Perp Race” “Victim Age Group” “Victim Sex” “Victim Race”
[9] “Latitude” “Longitude” “Period”
# filtering missing coordinates
shooting_coords <- shootings_raw %>%
filter(!is.na(Longitude), !is.na(Latitude))
# Creating interactive map
leaflet(shooting_coords) %>%
addProviderTiles("CartoDB.Positron") %>%
addCircleMarkers(
lng = ~Longitude,
lat = ~Latitude,
radius = 2,
color = "red",
stroke = FALSE,
fillOpacity = 0.2,
popup = ~paste0(
"Date: ", OCCUR_DATE,
"<br>Borough: ", BORO,
"<br>Victim Sex: ", VIC_SEX,
"<br>Perpetrator Sex: ", PERP_SEX
)
) %>%
addLegend("bottomright", colors = "red", labels = "Shooting Incident")
| Year | Borough | Population |
|---|---|---|
| 2000 | Manhattan | 1537195 |
| 2000 | Brooklyn | 2465326 |
| 2000 | Queens | 2229379 |
| 2000 | Bronx | 1332650 |
| 2000 | Staten Island | 443728 |
| 2010 | Manhattan | 1585873 |
| 2010 | Brooklyn | 2504700 |
| 2010 | Queens | 2230722 |
| 2010 | Bronx | 1385108 |
| 2010 | Staten Island | 468730 |
| 2020 | Manhattan | 1694251 |
| 2020 | Brooklyn | 2736074 |
| 2020 | Queens | 2405464 |
| 2020 | Bronx | 1472654 |
| 2020 | Staten Island | 495747 |
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
generated.
| Year | Manhattan | Brooklyn | Queens | Bronx | Staten Island |
|---|---|---|---|---|---|
| 2000 | 19.2 | 30.8 | 27.8 | 16.6 | 5.5 |
| 2010 | 19.4 | 30.6 | 27.3 | 16.9 | 5.7 |
| 2020 | 19.2 | 31.1 | 27.3 | 16.7 | 5.6 |
shootings_clean %>%
filter(!is.na(BORO), BORO != "") %>%
count(BORO) %>%
ggplot(aes(x = reorder(BORO, n), y = n, fill = BORO)) +
geom_col(show.legend = FALSE) +
labs(
title = "Number of Shooting Incidents by Borough",
x = "Borough",
y = "Total Shootings"
) +
theme_minimal(base_size = 14) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
| Borough | Number of Shootings | Percent of Total (%) |
|---|---|---|
| BROOKLYN | 11685 | 39.3 |
| BRONX | 8834 | 29.7 |
| QUEENS | 4426 | 14.9 |
| MANHATTAN | 3977 | 13.4 |
| STATEN ISLAND | 822 | 2.8 |
| Borough | Observed | Expected |
|---|---|---|
| Bronx | 8834 | 4988 |
| Brooklyn | 11685 | 9173 |
| Manhattan | 3977 | 5734 |
| Queens | 4426 | 8172 |
| Staten Island | 822 | 1676 |
Chi-squared test for given probabilities
data: shootings_obs$Observed X-squared = 6344.6, df = 4, p-value < 2.2e-16 ## Observed vs. Expected Counts by Borough